# Replication package for 
# "Latent Territorial Threat and Democratic Regime Reversals"
# Johannes Karreth, Jaroslav Tir, and Douglas M. Gibler
# Forthcoming in the Journal of Peace Research, 2021
# jkarreth@ursinus.edu

# Please read "0_readme.html" before using this code.

# This file is `2_predict_TT.R`
# It generates territorial threat scores
# After this file, run `3_analyze_reversals.R` next

rm(list = ls())

######################################
### Load packages
######################################

set.seed(123)
library("tidyverse")

# Set working directory at the level of the replication folder

#################################################
# Setup
#################################################

TTm.dat <- read.csv("Work/TTm.csv")

#################################################
# BMA
#################################################

TTm.dat1946 <- dplyr::filter(TTm.dat, year >= 1946) # avoid conflict with stats::filter

TTbma.dat <- TTm.dat[, c("dyadid", "year", "ccode1", "ccode2", "age", "age.ln", "fatal.mid", "fatal.mid", "terr.mid.last5", "mid.peaceyears", "mid.peaceyears2", "mid.peaceyears3", "all.defensepact", "any.civilwar", "max.militarization", "samecolonizer", "violtransfer.count", "peacetransfer.count", "violtransfer.past", "peacetransfer.past", "min.neighbors", "max.neighbors", "dif.neighbors", "icowsal")]
# TTbma.dat <- na.omit(TTbma.dat)

TTbma.dat1946 <- TTm.dat1946[, c("dyadid", "year", "ccode1", "ccode2", "age", "age.ln", "fatal.mid", "fatal.mid", "terr.mid.last5", "mid.peaceyears", "mid.peaceyears2", "mid.peaceyears3", "all.defensepact", "any.civilwar", "max.militarization", "samecolonizer", "violtransfer.count", "peacetransfer.count", "violtransfer.past", "peacetransfer.past", "min.neighbors", "max.neighbors", "dif.neighbors", "icowsal")]

TTbma.dat$log.max.militarization <- log(TTbma.dat$max.militarization)
TTbma.dat1946$log.max.militarization <- log(TTbma.dat1946$max.militarization)

library("BMA")

TTbma.full <- bic.glm(fatal.mid ~ age.ln + all.defensepact + terr.mid.last5 + any.civilwar + log.max.militarization + samecolonizer + violtransfer.count + peacetransfer.count + max.neighbors + icowsal + mid.peaceyears + mid.peaceyears2 + mid.peaceyears3, data = na.omit(TTbma.dat), glm.family = "binomial")

summary(TTbma.full)

TTbma.1946 <- bic.glm(fatal.mid ~ age.ln + all.defensepact + terr.mid.last5 + any.civilwar + log.max.militarization + samecolonizer + violtransfer.count + peacetransfer.count + max.neighbors + icowsal + mid.peaceyears + mid.peaceyears2 + mid.peaceyears3, data = na.omit(TTbma.dat1946), glm.family = "binomial")

summary(TTbma.1946)

#################################################
# Random forests
#################################################

TTrf.dat <- TTbma.dat[, c("fatal.mid", "age", "age.ln", "terr.mid.last5", "mid.peaceyears", "mid.peaceyears2", "mid.peaceyears3", "all.defensepact", "any.civilwar", "log.max.militarization", "samecolonizer", "violtransfer.count", "peacetransfer.count", "violtransfer.past", "peacetransfer.past", "min.neighbors", "max.neighbors", "dif.neighbors", "icowsal")]

TTrf.dat1946 <- TTbma.dat1946[, c("fatal.mid", "age", "age.ln", "terr.mid.last5", "mid.peaceyears", "mid.peaceyears2", "mid.peaceyears3", "all.defensepact", "any.civilwar", "log.max.militarization", "samecolonizer", "violtransfer.count", "peacetransfer.count", "violtransfer.past", "peacetransfer.past", "min.neighbors", "max.neighbors", "dif.neighbors", "icowsal")]

library("randomForest")

TTfm.rf <- randomForest(factor(fatal.mid) ~ age.ln + all.defensepact + terr.mid.last5 + any.civilwar + log.max.militarization + samecolonizer + violtransfer.count + peacetransfer.count + max.neighbors + icowsal + mid.peaceyears + mid.peaceyears2 + mid.peaceyears3, data = TTrf.dat, importance = TRUE)

print(TTfm.rf)
round(importance(TTfm.rf), 2)
varImpPlot(TTfm.rf)

names(TTrf.dat1946) <- c("Fatal MID", "Border age", "Border age (logged)", "Territorial MID (last 5 years)", "Peace years", "Peace years (sq.)", "Peace years (cu.)", "Defense pact", "Civil war (any neighbor)", "Max. militarization (logged)", "Same colonizer", "Violent territorial transfer (count)", "Peaceful territorial transfer (count)", "Violent territorial transfer (past)", "Peaceful territorial transfer (past)", "Min. neighbors", "Max. neighbors", "Dif. neighbors", "ICOW claim salience (given year)")

TTrf.dat1946 <- TTrf.dat1946[, c("Fatal MID", "Border age", "Border age (logged)", "Territorial MID (last 5 years)", "Peace years", "Peace years (sq.)", "Peace years (cu.)", "Defense pact", "Civil war (any neighbor)", "Max. militarization (logged)", "Same colonizer", "Violent territorial transfer (count)", "Peaceful territorial transfer (count)", "Violent territorial transfer (past)", "Peaceful territorial transfer (past)", "Max. neighbors", "ICOW claim salience (given year)")]

TTfm.rf1946 <- randomForest(y = factor(TTrf.dat1946[, 1]), 
                            x = TTrf.dat1946[, 2:17],
                            data = TTrf.dat1946, importance = TRUE)

round(importance(TTfm.rf1946), 2)

pdf("Output/Figures/tt1946_varImpPlot.pdf", width = 12, height = 6)
varImpPlot(TTfm.rf1946, main = "")
dev.off()

#################################################
# Territorial threat model based on BMA
#################################################

TTbma.dat.pred <- na.omit(TTbma.dat)

TTfm.mod <- glm(fatal.mid ~ age.ln + all.defensepact + terr.mid.last5 + any.civilwar + log.max.militarization + samecolonizer + violtransfer.count + peacetransfer.count + max.neighbors + icowsal + mid.peaceyears + mid.peaceyears2 + mid.peaceyears3, data = TTbma.dat.pred, family = binomial, x = TRUE)

TTfm1946.mod <- glm(fatal.mid ~ age.ln + all.defensepact + terr.mid.last5 + any.civilwar + log.max.militarization + samecolonizer + violtransfer.count + peacetransfer.count + max.neighbors + icowsal + mid.peaceyears + mid.peaceyears2 + mid.peaceyears3, data = TTbma.dat.pred[TTbma.dat.pred$year >= 1946, ], family = binomial, x = TRUE)

# compare 1816 model to 1946 model
texreg::screenreg(list(TTfm.mod, TTfm1946.mod))

# compare 1816 predictions to 1946 predictions
TTfm.mod.pluspred <- cbind(TTfm.mod$data, fitted.values(TTfm.mod))
cor(TTfm.mod.pluspred[TTfm.mod.pluspred$year >= 1946, ]$`fitted.values(TTfm.mod)`, fitted.values(TTfm1946.mod))

### Predicted territorial threat
TTbma.dat.pred$fm.yhat <- predict(TTfm.mod, se.fit = FALSE)
TTbma.dat.pred$pr.fmid <- (exp(TTbma.dat.pred$fm.yhat) / (1 + exp(TTbma.dat.pred$fm.yhat)))
TTbma.dat.pred$pr.fmid <- TTfm.mod$fitted.values

library("rstanarm")

# Calls to stan_glm are commented out because each model takes 
# quite some time to run.
# Instead, the code loads the saved model object.
# Un-comment the call to stan_glm to fit the model on your own.

# TTfm.stanmod <- stan_glm(fatal.mid ~ age.ln + all.defensepact + terr.mid.last5 + any.civilwar + log.max.militarization + samecolonizer + violtransfer.count + peacetransfer.count + max.neighbors + icowsal + mid.peaceyears + mid.peaceyears2 + mid.peaceyears3, 
#                        family = binomial(link = "logit"),
#                        data = TTbma.dat.pred, 
#                        prior = cauchy(location = 0, scale = 2.5, autoscale = TRUE),
#                        prior_intercept = cauchy(location = 0, scale = 10, autoscale = TRUE),
#                        QR = TRUE,
#                        cores = 4,
#                        iter = 5000,
#                        control = list(adapt_delta = 0.99))
# 
# save(TTfm.stanmod, file = "Work/TTfm1816_stan.RData")
load(file = "Work/TTfm1816_stan.RData")

# TTfm1946.stanmod <- stan_glm(fatal.mid ~ age.ln + all.defensepact + terr.mid.last5 + any.civilwar + log.max.militarization + samecolonizer + violtransfer.count + peacetransfer.count + max.neighbors + icowsal + mid.peaceyears + mid.peaceyears2 + mid.peaceyears3, 
#                             family = binomial(link = "logit"),
#                             data = TTbma.dat.pred[TTbma.dat.pred$year >= 1946, ], 
#                             prior = cauchy(location = 0, scale = 2.5, autoscale = TRUE),
#                             prior_intercept = cauchy(location = 0, scale = 10, autoscale = TRUE),
#                             QR = TRUE,
#                             cores = 4,
#                             iter = 5000,
#                             control = list(adapt_delta = 0.99))

### Save for replication!!!

### see below for all analyses using the 1946-2016 period only
save(TTfm1946.stanmod, file = "Work/TTfm1946_stan.RData")
load(file = "Work/TTfm1946_stan.RData")

# with xtable

library("BayesPostEst")
tt_tab <- mcmcTab(TTfm1946.stanmod, Pr = TRUE)

tt_tab <- data.frame(tt_tab)
tt_tab <- add_row(.data = tt_tab, Median = (nrow(model.matrix(TTfm1946.stanmod))))

tt_tab$Variable <- c("Intercept", "Border age (logged)", "Defense pact", "Territorial MID (last 5 years", "Civil war (any neighbor)", "Max. militarization (logged)", "Same colonizer", "Violent territorial transfer (past)", "Peaceful territorial transfer (past)", "Max. neighbors", "ICOW claim salience (given year)", "Peace years", "Peace years (sq.)", "Peace years (cu.)", "N (dyad-years)")

tt_tab <- select(tt_tab, -Lower, -Upper)

names(tt_tab) <- c("", "Median", "Std. dev.", "Pr(Estimate)")

library("xtable")

tt_tabxt <- xtable(tt_tab, caption = "Posterior estimates from the predictive model used to generate the latent territorial threat measure. Outcome: Fatal militarized interstate disputes between contiguous countries, 1946-2016.",
                     label = "tab_tt1946", digits = 3,
                     align = "llccc")

# Multicolumn add from <http://stackoverflow.com/a/28420723>

print(tt_tabxt, file = "Output/Tables/tab_tt1946.tex",
      include.colnames = TRUE, include.rownames = FALSE,
      booktabs = TRUE, caption.placement = "top", size = "scriptsize", digits = 2)

### Convergence diagnostics

library("mcmcplots")
mcmcplot(rstan::As.mcmc.list(TTfm1946.stanmod$stanfit))

### Predicted probabilities

# Using rstanarm, I can skip the step-by-step version below in favor of using posterior_linpred (to get the predicted probability) or posterior_predict (to get the 0/1 prediction, which in this case is less useful...)

TTfm.stanmod.pred <- posterior_epred(object = TTfm.stanmod, draws = 1000, seed = 123)

TTfm.stanmod.meanpp <- apply(X = TTfm.stanmod.pred, MARGIN = 2, function(x) mean(x))
TTfm.stanmod.medianpp <- apply(X = TTfm.stanmod.pred, MARGIN = 2, function(x) median(x))
TTfm.stanmod.sdpp <- apply(X = TTfm.stanmod.pred, MARGIN = 2, function(x) sd(x))
TTfm.stanmod.10pp <- apply(X = TTfm.stanmod.pred, MARGIN = 2, function(x) quantile(x, probs = c(0.1)))
TTfm.stanmod.90pp <- apply(X = TTfm.stanmod.pred, MARGIN = 2, function(x) quantile(x, probs = c(0.9)))

# add to original dataframe

TTbma.dat.pred$mcppfm <- TTfm.stanmod.meanpp
TTbma.dat.pred$mcppfm50 <- TTfm.stanmod.medianpp
TTbma.dat.pred$mcppfmsd <- TTfm.stanmod.sdpp
TTbma.dat.pred$mcppfm10 <- TTfm.stanmod.10pp
TTbma.dat.pred$mcppfm90 <- TTfm.stanmod.90pp

# Version by hand (redundant)

# TTfm.stanmod.dat <- as.data.frame(as.matrix(TTfm.stanmod$stanfit))
# 
# Xb.fm <- apply(TTfm.stanmod.dat, 1, function(x) x[1] * 1 + x[2] * TTfm.stanmod$model[, 2] + x[3] * TTfm.stanmod$model[, 3] + x[4] * TTfm.stanmod$model[, 4] + x[5] * TTfm.stanmod$model[, 5] + x[6] * TTfm.stanmod$model[, 6] + x[7] * TTfm.stanmod$model[, 7] + x[8] * TTfm.stanmod$model[, 8] + x[9] * TTfm.stanmod$model[, 9] + x[10] * TTfm.stanmod$model[, 10] + x[11] * TTfm.stanmod$model[, 11] + x[12] * TTfm.stanmod$model[, 12] + x[13] * TTfm.stanmod$model[, 13] + x[14] * TTfm.stanmod$model[, 14])
# 
# TTfm.stanmod.pp <- exp(Xb.fm) / (1 + exp(Xb.fm))
# 
# TTfm.stanmod.meanpp <- apply(TTfm.stanmod.pp, 1, function(x) mean(x))

### ROC plot

# TTfm.mcmod.pp.dat <- as.data.frame(t(TTfm.mcmod.pp))
# ggs_rocplot(D = TTfm.mcmod.pp.dat, outcome = TTbma.dat.pred$fatal.mid)
# 
# # Firth's logit, correcting for separation
# 
# library(logistf)
# 
# TT.fmod <- logistf(fatal.mid ~ age.ln + terr.mid.last5 + any.civilwar + log.max.militarization + samecolonizer + violtransfer.count + peacetransfer.count + dif.neighbors + mid.peaceyears + mid.peaceyears2 + mid.peaceyears3, data = TTbma.dat.pred)
# 
# summary(TT.fmod$predict)

# Model fit
library("separationplot")
separationplot(pred = TTbma.dat.pred$mcppfm, actual = TTbma.dat.pred$fatal.mid, type = "line", line = TRUE, shuffle = TRUE, show.expected = TRUE, zerosfirst = TRUE, BW = FALSE, file = "Output/Figures/TTfm_sepplot.pdf")

### Now 1946-2016; add as separate variables

### Convergence diagnostics

mcmcplot(rstan::As.mcmc.list(TTfm1946.stanmod$stanfit))

### Predicted probabilities

TTfm1946.stanmod.pred <- posterior_epred(object = TTfm1946.stanmod, draws = 1000, seed = 123)

TTfm1946.stanmod.meanpp <- apply(X = TTfm1946.stanmod.pred, MARGIN = 2, function(x) mean(x))
TTfm1946.stanmod.medianpp <- apply(X = TTfm1946.stanmod.pred, MARGIN = 2, function(x) median(x))
TTfm1946.stanmod.sdpp <- apply(X = TTfm1946.stanmod.pred, MARGIN = 2, function(x) sd(x))
TTfm1946.stanmod.10pp <- apply(X = TTfm1946.stanmod.pred, MARGIN = 2, function(x) quantile(x, probs = c(0.1)))
TTfm1946.stanmod.90pp <- apply(X = TTfm1946.stanmod.pred, MARGIN = 2, function(x) quantile(x, probs = c(0.9)))

# add to original dataframe

TTbma.dat.pred$mcpp1946fm <- NA
TTbma.dat.pred$mcpp1946fm50 <- NA
TTbma.dat.pred$mcpp1946fmsd <- NA
TTbma.dat.pred$mcpp1946fm10 <- NA
TTbma.dat.pred$mcpp1946fm90 <- NA

TTbma.dat.pred[TTbma.dat.pred$year >= 1946, ]$mcpp1946fm <- TTfm1946.stanmod.meanpp
TTbma.dat.pred[TTbma.dat.pred$year >= 1946, ]$mcpp1946fm50 <- TTfm1946.stanmod.medianpp
TTbma.dat.pred[TTbma.dat.pred$year >= 1946, ]$mcpp1946fmsd <- TTfm1946.stanmod.sdpp
TTbma.dat.pred[TTbma.dat.pred$year >= 1946, ]$mcpp1946fm10 <- TTfm1946.stanmod.10pp
TTbma.dat.pred[TTbma.dat.pred$year >= 1946, ]$mcpp1946fm90 <- TTfm1946.stanmod.90pp

# Comparison

cor(TTbma.dat.pred$mcppfm, TTbma.dat.pred$mcpp1946fm, use = "complete.obs")

# Model fit
separationplot(pred = TTbma.dat.pred[TTbma.dat.pred$year >= 1946, ]$mcpp1946fm, actual = TTbma.dat.pred[TTbma.dat.pred$year >= 1946, ]$fatal.mid, type = "line", line = TRUE, shuffle = TRUE, show.expected = TRUE, zerosfirst = TRUE, BW = FALSE, file = "Output/Figures/TTfm1946_sepplot.pdf")

# Comparison via other measures - AUC

TTfm1946_curves <- mcmcRocPrcGen(modelmatrix = model.matrix(TTfm1946.stanmod, data = TTbma.dat.pred[TTbma.dat.pred$year >= 1946, ]), 
                              mcmcout = as.matrix(TTfm1946.stanmod), 
                              modelframe = model.frame(TTfm1946.stanmod, data = TTbma.dat.pred[TTbma.dat.pred$year >= 1946, ]), 
                              curves = TRUE,
                              link = "logit", fullsims = FALSE)

TTfm1946_curves$area_under_roc
TTfm1946_curves$area_under_prc

TTfm1946_roc <- ggplot(data = TTfm1946_curves$roc_dat, aes(x = x, y = y)) +
    geom_line() + 
    geom_abline(intercept = 0, slope = 1, color = "gray") + 
    labs(title = "ROC curve") + 
    xlab("1 - Specificity") + 
    ylab("Sensitivity") + theme_bw() + 
    annotate(geom = "text", x = 0.75, y = 0.25, label = "Area under curve: 0.93")

TTfm1946_prc <- ggplot(data = TTfm1946_curves$prc_dat, aes(x = x, y = y)) +
    geom_line() + 
    # geom_abline(intercept = 0, slope = 1, color = "gray") + 
    labs(title = "Precision-Recall curve") + 
    xlab("Recall") + 
    ylab("Precision") + theme_bw() + 
    annotate(geom = "text", x = 0.1, y = 0.25, label = "Area under curve: 0.413", hjust = 0)

library("cowplot")

ggsave(TTfm1946_roc, file = "Output/Figures/tt1946_rocplot.pdf", width = 4, height = 4)

pdf("Output/Figures/tt1946_curveplot.pdf", width = 8, height = 4)
plot_grid(TTfm1946_roc, TTfm1946_prc, ncol = 2)
dev.off()

### Generate dataset

# The resulting dataset will be TT.pred; I'll make one for c1 and c2 and then rbind them
TT.pred.c1 <- group_by(TTbma.dat.pred, ccode1, year, add = FALSE)
TT.pred.c1 <- dplyr::summarize(TT.pred.c1, 
                        max_terrthreatfm_mcpp = max(mcppfm), max_terrthreatfm_mcppSD = max(mcppfmsd), max_terrthreatfm_mcpp10 = max(mcppfm10), max_terrthreatfm_mcpp90 = max(mcppfm90),
                        max_terrthreatfm_mc1946pp = max(mcpp1946fm), max_terrthreatfm_mc1946ppSD = max(mcpp1946fmsd), max_terrthreatfm_mc1946pp10 = max(mcpp1946fm10), max_terrthreatfm_mc1946pp90 = max(mcpp1946fm90))

TT.pred.c2 <- group_by(TTbma.dat.pred, ccode2, year, add = FALSE)
TT.pred.c2 <- dplyr::summarize(TT.pred.c2, 
                        max_terrthreatfm_mcpp = max(mcppfm), max_terrthreatfm_mcppSD = max(mcppfmsd), max_terrthreatfm_mcpp10 = max(mcppfm10), max_terrthreatfm_mcpp90 = max(mcppfm90),
                        max_terrthreatfm_mc1946pp = max(mcpp1946fm), max_terrthreatfm_mc1946ppSD = max(mcpp1946fmsd), max_terrthreatfm_mc1946pp10 = max(mcpp1946fm10), max_terrthreatfm_mc1946pp90 = max(mcpp1946fm90))

TT.pred.c1m <- data.frame(ccode = TT.pred.c1$ccode1, year = TT.pred.c1$year, 
                          max_terrthreatfm_mcpp = TT.pred.c1$max_terrthreatfm_mcpp, max_terrthreatfm_mcppSD = TT.pred.c1$max_terrthreatfm_mcppSD, max_terrthreatfm_mcpp10 = TT.pred.c1$max_terrthreatfm_mcpp10, max_terrthreatfm_mcpp90 = TT.pred.c1$max_terrthreatfm_mcpp90,
                          max_terrthreatfm_mc1946pp = TT.pred.c1$max_terrthreatfm_mc1946pp, max_terrthreatfm_mc1946ppSD = TT.pred.c1$max_terrthreatfm_mc1946ppSD, max_terrthreatfm_mc1946pp10 = TT.pred.c1$max_terrthreatfm_mc1946pp10, max_terrthreatfm_mc1946pp90 = TT.pred.c1$max_terrthreatfm_mc1946pp90)

TT.pred.c2m <- data.frame(ccode = TT.pred.c2$ccode2, year = TT.pred.c2$year, 
                          max_terrthreatfm_mcpp = TT.pred.c2$max_terrthreatfm_mcpp, max_terrthreatfm_mcppSD = TT.pred.c2$max_terrthreatfm_mcppSD, max_terrthreatfm_mcpp10 = TT.pred.c2$max_terrthreatfm_mcpp10, max_terrthreatfm_mcpp90 = TT.pred.c2$max_terrthreatfm_mcpp90,
                          max_terrthreatfm_mc1946pp = TT.pred.c2$max_terrthreatfm_mc1946pp, max_terrthreatfm_mc1946ppSD = TT.pred.c2$max_terrthreatfm_mc1946ppSD, max_terrthreatfm_mc1946pp10 = TT.pred.c2$max_terrthreatfm_mc1946pp10, max_terrthreatfm_mc1946pp90 = TT.pred.c2$max_terrthreatfm_mc1946pp90)

TT.pred <- rbind(TT.pred.c1m, TT.pred.c2m)
TT.pred <- group_by(TT.pred, ccode, year, add = FALSE)

TT.pred <- dplyr::summarize(TT.pred, 
                     max_terrthreatfm_mcpp = max(max_terrthreatfm_mcpp), max_terrthreatfm_mcppSD = max(max_terrthreatfm_mcppSD), max_terrthreatfm_mcpp10 = max(max_terrthreatfm_mcpp10), max_terrthreatfm_mcpp90 = max(max_terrthreatfm_mcpp90), 
                     max_terrthreatfm_mc1946pp = max(max_terrthreatfm_mc1946pp), max_terrthreatfm_mc1946ppSD = max(max_terrthreatfm_mc1946ppSD), max_terrthreatfm_mc1946pp10 = max(max_terrthreatfm_mc1946pp10), max_terrthreatfm_mc1946pp90 = max(max_terrthreatfm_mc1946pp90))

TT.pred <- arrange(TT.pred, ccode, year)

table(TT.pred$ccode)
dim(TT.pred[TT.pred$year > 1945, ])

library("countrycode")
TT.pred$cname <- countrycode(sourcevar = TT.pred$ccode, origin = "cown", destination = "country.name")
TT.pred$cshort <- countrycode(sourcevar = TT.pred$ccode, origin = "cown", destination = "iso3c")
TT.pred$region <- countrycode(sourcevar = TT.pred$ccode, origin = "cown", destination = "region")

# Fix:
# 245 Bavaria
TT.pred[TT.pred$ccode == 245, ]$cname <- "Bavaria"
# 260 Germany Federal Republic
TT.pred[TT.pred$ccode == 260, ]$cname <- "Germany Federal Republic"
# 267 Baden
TT.pred[TT.pred$ccode == 267, ]$cname <- "Baden"
# 300 Austria-Hungary
TT.pred[TT.pred$ccode == 300, ]$cname <- "Austria-Hungary"
# 678 Yemen Arab Republic
TT.pred[TT.pred$ccode == 678, ]$cname <- "Yemen Arab Republic"
# 680 Yemen People's Republic
TT.pred[TT.pred$ccode == 680, ]$cname <- "Yemen People's Republic"
# 730 Korea
TT.pred[TT.pred$ccode == 730, ]$cname <- "Korea"
# 731 South Korea
TT.pred[TT.pred$ccode == 731, ]$cname <- "South Korea"
# 817 Vietnam
TT.pred[TT.pred$ccode == 816, ]$cname <- "Vietnam"

TT.pred <- filter(TT.pred, ccode != 817)

#################################################
# Final dataset for analyses of backsliding
#################################################

write.csv(arrange(TT.pred, ccode, year), "Work/TTpred.csv", row.names = FALSE)
save(TT.pred, file = "Work/TTpred.RData")


#################################################
# Summary statistics for estimation model
#################################################

library("pastecs")
library("knitr")

summary.mf <- model.frame(fatal.mid ~ age.ln + all.defensepact + terr.mid.last5 + any.civilwar + log.max.militarization + samecolonizer + violtransfer.count + peacetransfer.count + max.neighbors + icowsal + mid.peaceyears, data = TTbma.dat.pred)
summary.mf1946 <- model.frame(fatal.mid ~ age.ln + all.defensepact + terr.mid.last5 + any.civilwar + log.max.militarization + samecolonizer + violtransfer.count + peacetransfer.count + max.neighbors + icowsal + mid.peaceyears, data = filter(TTbma.dat.pred, year >= 1946))

summary.dat <- t(stat.desc(summary.mf))
summary.dat1946 <- t(stat.desc(summary.mf1946))

summaryind.dat <- summary.dat[, c( "mean", "std.dev", "min", "max", "nbr.val")]
summaryind.dat1946 <- summary.dat1946[, c( "mean", "std.dev", "min", "max", "nbr.val")]

summaryind.tab <- round(summaryind.dat, digits = 2)
summaryind.tab1946 <- round(summaryind.dat1946, digits = 2)

rownames(summaryind.tab) <- c("Fatal territorial MID in given year", "Border age (logged)", "Defense pact", "Territorial MID (last 5 years)", "Civil war (any neighbor)", "Max. militarization (logged)", "Same colonizer", "Violent territorial transfer (past)", "Peaceful territorial transfer (past)", "Max. neighbors", "ICOW claim salience", "Peace years")
rownames(summaryind.tab1946) <- c("Fatal territorial MID in given year", "Border age (logged)", "Defense pact", "Territorial MID (last 5 years)", "Civil war (any neighbor)", "Max. militarization (logged)", "Same colonizer", "Violent territorial transfer (past)", "Peaceful territorial transfer (past)", "Max. neighbors", "ICOW claim salience", "Peace years")

colnames(summaryind.tab) <- c("Mean/Proportion", "Std. Dev.", "Min.", "Max.", "N (dyad-years)")
colnames(summaryind.tab1946) <- c("Mean/Proportion", "Std. Dev.", "Min.", "Max.", "N (dyad-years)")

summaryind.tab[, 5] <- as.character(summaryind.tab[, 5])
summaryind.tab1946[, 5] <- as.character(summaryind.tab1946[, 5])

kable(summaryind.tab, format = "markdown", row.names = TRUE, caption = "Summary statistics")
kable(summaryind.tab1946, format = "markdown", row.names = TRUE, caption = "Summary statistics")

library("xtable")
summaryind.xtab <- xtable(summaryind.tab, caption = "Summary statistics for models of fatal MIDs between contiguous dyads, 1816-2016.", label = "tab_tt_sumstat", digits = 2, align = "lccccc")
summaryind.xtab1946 <- xtable(summaryind.tab1946, caption = "Summary statistics for models of fatal MIDs between contiguous dyads, 1946-2016.", label = "tab_tt_sumstat1946", digits = 2, align = "lccccc")

print(summaryind.xtab, file = "Output/Tables/tab_tt_sumstat.tex", include.colnames = TRUE, include.rownames = TRUE, booktabs = TRUE, caption.placement = "top", size = "footnotesize", digits = 2)
print(summaryind.xtab1946, file = "Output/Tables/tab_tt_sumstat1946.tex", include.colnames = TRUE, include.rownames = TRUE, booktabs = TRUE, caption.placement = "top", size = "footnotesize", digits = 2)

#################################################
# Summarize/describe TT
#################################################

TTfm.list <- TT.pred[, c("ccode", "year", 
                         "max_terrthreatfm_mcpp", "max_terrthreatfm_mcpp10", "max_terrthreatfm_mcpp90", 
                         "max_terrthreatfm_mc1946pp", "max_terrthreatfm_mc1946pp10", "max_terrthreatfm_mc1946pp90",
                         "cname")]

TTfm.list$cname2 <- stringi::stri_trans_totitle(TTfm.list$cname)

TTfm.list[TTfm.list$cname2 == "Venezuela, Bolivarian Republic Of", ]$cname2 <- "Venezuela"
TTfm.list[TTfm.list$cname2 == "Lao People's Democratic Republic", ]$cname2 <- "Laos"
TTfm.list[TTfm.list$cname2 == "Russian Federation", ]$cname2 <- "Russia"
TTfm.list[TTfm.list$cname2 == "Austria-hungary", ]$cname2 <- "Austria-Hungary"
TTfm.list[TTfm.list$cname2 == "Côte D’ivoire", ]$cname2 <- "Côte D'Ivoire"
TTfm.list[TTfm.list$cname2 == "Guinea-bissau", ]$cname2 <- "Guinea-Bissau"
TTfm.list[TTfm.list$cname2 == "Myanmar (burma)", ]$cname2 <- "Myanmar (Burma)"
TTfm.list[TTfm.list$cname2 == "Timor-leste", ]$cname2 <- "Timor-Leste" 

TTfm.list$decade <- floor(TTfm.list$year / 10) * 10
TTfm.list$decade2 <- paste(TTfm.list$decade, "s", sep = "")
TTfm.list$cyear <- paste(TTfm.list$cname2, TTfm.list$year)
TTfm.list$cdec <- paste(TTfm.list$cname2, TTfm.list$decade2)

### By country-decade
### All years after 1816

TTfm.list.dec <- dplyr::summarize(group_by(TTfm.list, cdec, add = FALSE), 
                           ttfm_mean = max(max_terrthreatfm_mcpp) * 100, ttfm_lower = max(max_terrthreatfm_mcpp10) * 100, ttfm_upper = max(max_terrthreatfm_mcpp90) * 100,
                           tt1946fm_mean = max(max_terrthreatfm_mc1946pp) * 100, tt1946fm_lower = max(max_terrthreatfm_mc1946pp10) * 100, tt1946fm_upper = max(max_terrthreatfm_mc1946pp90) * 100)
rows <- nrow(TTfm.list.dec)

# select top 60 TT and bottom 10 TT
TTfm.list.dec2 <- arrange(TTfm.list.dec, -tt1946fm_mean)[c(1:50), ]

TT.dec.all <- ggplot(dat = TTfm.list.dec2, aes(y = tt1946fm_mean, x = reorder(cdec, tt1946fm_mean))) + geom_pointrange(aes(y = tt1946fm_mean, x = reorder(cdec, tt1946fm_mean), ymin = tt1946fm_lower, ymax = tt1946fm_upper))  + coord_flip() + ylab("Latent territorial threat") + xlab("") + theme_bw()

### Only post-WWII

TTfm.list.dec <- dplyr::summarize(group_by(TTfm.list[TTfm.list$year > 1945, ], cdec, add = FALSE), 
                           ttfm_mean = max(max_terrthreatfm_mcpp) * 100, ttfm_lower = max(max_terrthreatfm_mcpp10) * 100, ttfm_upper = max(max_terrthreatfm_mcpp90) * 100,
                           tt1946fm_mean = max(max_terrthreatfm_mc1946pp) * 100, tt1946fm_lower = max(max_terrthreatfm_mc1946pp10) * 100, tt1946fm_upper = max(max_terrthreatfm_mc1946pp90) * 100)
rows <- nrow(TTfm.list.dec)

TTfm.list.dec2 <- arrange(TTfm.list.dec, -tt1946fm_mean)[c(1:100), ]

TTfm.dec.postww2 <- ggplot(dat = TTfm.list.dec2, aes(y = ttfm_mean, x = reorder(cdec, ttfm_mean))) + geom_pointrange(aes(y = ttfm_mean, x = reorder(cdec, ttfm_mean), ymin = ttfm_lower, ymax = ttfm_upper))  + coord_flip() + ylab("Latent territorial threat (1816-2016 model)") + xlab("") + theme_bw()

TT1946fm.dec.postww2 <- ggplot(dat = TTfm.list.dec2, aes(y = tt1946fm_mean, x = reorder(cdec, tt1946fm_mean))) + geom_pointrange(aes(y = tt1946fm_mean, x = reorder(cdec, tt1946fm_mean), ymin = tt1946fm_lower, ymax = tt1946fm_upper))  + coord_flip() + ylab("Latent territorial threat (1946-2016 model)") + xlab("") + theme_bw()


pdf("Output/Figures/ttdec_dotplot.pdf", width = 10, height = 13)
TTfm.dec.postww2
dev.off()

pdf("Output/Figures/tt1946dec_dotplot.pdf", width = 10, height = 13)
TT1946fm.dec.postww2
dev.off()

### By country (post-WWII)

TT.list.c <- dplyr::summarize(group_by(TTfm.list[TTfm.list$year > 1945, ], cname2, add = FALSE), 
                       ttfm_mean = max(max_terrthreatfm_mcpp) * 100, ttfm_lower = max(max_terrthreatfm_mcpp10) * 100, ttfm_upper = max(max_terrthreatfm_mcpp90) * 100,
                       tt1946fm_mean = max(max_terrthreatfm_mc1946pp) * 100, tt1946fm_lower = max(max_terrthreatfm_mc1946pp10) * 100, tt1946fm_upper = max(max_terrthreatfm_mc1946pp90) * 100)

rows <- nrow(TT.list.c)
midrowshi <- rows/2 + 0.5
midrowslo <- rows/2 - 0.5

# again, based on 1816-

TT.list.chi <- arrange(TT.list.c, -ttfm_mean)[c(1:midrowshi), ]
TT.list.clo <- arrange(TT.list.c, -ttfm_mean)[c(midrowslo:rows), ]

TT.c.postww2hi <- ggplot(dat = TT.list.chi, aes(y = ttfm_mean, x = reorder(cname2, ttfm_mean))) + geom_pointrange(aes(y = ttfm_mean, x = reorder(cname2, ttfm_mean), ymin = ttfm_lower, ymax = ttfm_upper))  + coord_flip() + ylab("Latent territorial threat") + xlab("") + theme_bw()
TT.c.postww2lo <-  ggplot(dat = TT.list.clo, aes(y = ttfm_mean, x = reorder(cname2, ttfm_mean))) + geom_pointrange(aes(y = ttfm_mean, x = reorder(cname2, ttfm_mean), ymin = ttfm_lower, ymax = ttfm_upper))  + coord_flip() + ylab("Latent territorial threat") + xlab("") + theme_bw()


pdf("Output/Figures/tthi_dotplot.pdf", width = 10, height = 14)
TT.c.postww2hi
dev.off()

pdf("Output/Figures/ttlo_dotplot.pdf", width = 10, height = 14)
TT.c.postww2lo
dev.off()

# ... and based on 1946-

TT.list.chi <- arrange(TT.list.c, -tt1946fm_mean)[c(1:midrowshi), ]
TT.list.clo <- arrange(TT.list.c, -tt1946fm_mean)[c(midrowslo:rows), ]

TT.c.postww2hi <- ggplot(dat = TT.list.chi, aes(y = tt1946fm_mean, x = reorder(cname2, tt1946fm_mean))) + geom_pointrange(aes(y = tt1946fm_mean, x = reorder(cname2, tt1946fm_mean), ymin = tt1946fm_lower, ymax = tt1946fm_upper))  + coord_flip() + ylab("Latent territorial threat") + xlab("") + theme_bw()
TT.c.postww2lo <-  ggplot(dat = TT.list.clo, aes(y = tt1946fm_mean, x = reorder(cname2, tt1946fm_mean))) + geom_pointrange(aes(y = tt1946fm_mean, x = reorder(cname2, tt1946fm_mean), ymin = tt1946fm_lower, ymax = tt1946fm_upper))  + coord_flip() + ylab("Latent territorial threat") + xlab("") + theme_bw()


pdf("Output/Figures/tt1946hi_dotplot.pdf", width = 10, height = 14)
TT.c.postww2hi
dev.off()

pdf("Output/Figures/tt1946lo_dotplot.pdf", width = 10, height = 14)
TT.c.postww2lo
dev.off()

# Scatterplot to compare the two measures

cor(TT.pred$max_terrthreatfm_mcpp, TT.pred$max_terrthreatfm_mc1946pp, use = "complete.obs")
# [1] 0.9863356

TT.c.comparison <- ggplot(data = TT.pred, aes(x = max_terrthreatfm_mcpp, y = max_terrthreatfm_mc1946pp)) + 
    geom_abline(intercept = 0, slope = 1, size = 0.25) + 
    geom_point(size = 0.1) +  
    ylab("Latent territorial threat, based on 1946-2016 predictive model") + 
    xlab("Latent territorial threat, based on 1816-2016 predictive model") + 
    theme_bw()

pdf("Output/Figures/tt-tt1946_comparison.pdf", width = 6, height = 6)
TT.c.comparison
dev.off()

#################################################
# End of script
#################################################

# > sessionInfo()
# R version 4.0.3 (2020-10-10)
# Platform: x86_64-apple-darwin17.0 (64-bit)
# Running under: macOS Big Sur 10.16
# 
# Matrix products: default
# LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib
# 
# locale:
#   [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
# 
# attached base packages:
#   [1] stats     graphics  grDevices utils     datasets  methods   base     
# 
# other attached packages:
#   [1] knitr_1.30         pastecs_1.3.21     countrycode_1.2.0  cowplot_1.1.0      separationplot_1.3 MASS_7.3-53       
# [7] Hmisc_4.4-2        Formula_1.2-4      survival_3.2-7     lattice_0.20-41    RColorBrewer_1.1-2 rstanarm_2.21.1   
# [13] Rcpp_1.0.6         BayesPostEst_0.2.1 xtable_1.8-4       reshape2_1.4.4     forcats_0.5.0      stringr_1.4.0     
# [19] dplyr_1.0.2        purrr_0.3.4        readr_1.4.0        tidyr_1.1.2        tibble_3.0.4       ggplot2_3.3.2     
# [25] tidyverse_1.3.0    foreign_0.8-80    
# 
# loaded via a namespace (and not attached):
#   [1] utf8_1.1.4           tidyselect_1.1.0     lme4_1.1-26          htmlwidgets_1.5.3    grid_4.0.3           devtools_2.3.2      
# [7] miscTools_0.6-26     jtools_2.1.1         munsell_0.5.0        codetools_0.2-16     statmod_1.4.35       DT_0.16             
# [13] miniUI_0.1.1.1       withr_2.3.0          colorspace_2.0-0     highr_0.8            rstudioapi_0.13      stats4_4.0.3        
# [19] ROCR_1.0-11          gbRd_0.4-11          bayesplot_1.7.2      labeling_0.4.2       Rdpack_2.1           rstan_2.21.2        
# [25] mnormt_2.0.2         clarkeTest_0.1.0     farver_2.0.3         rprojroot_2.0.2      coda_0.19-4          vctrs_0.3.6         
# [31] generics_0.1.0       xfun_0.20            R6_2.5.0             markdown_1.1         VGAM_1.1-4           reshape_0.8.8       
# [37] bitops_1.0-6         assertthat_0.2.1     promises_1.1.1       scales_1.1.1         nnet_7.3-14          texreg_1.37.5       
# [43] gtable_0.3.0         processx_3.4.5       sandwich_3.0-0       rlang_0.4.10         splines_4.0.3        checkmate_2.0.0     
# [49] rjags_4-10           broom_0.7.3          inline_0.3.17        yaml_2.2.1           abind_1.4-5          modelr_0.1.8        
# [55] unmarked_1.0.1       threejs_0.3.3        crosstalk_1.1.0.1    backports_1.2.1      httpuv_1.5.4         rsconnect_0.8.16    
# [61] DAMisc_1.6.2         usethis_2.0.0        tools_4.0.3          psych_2.0.12         optiscale_1.2        ellipsis_0.3.1      
# [67] raster_3.4-5         R2WinBUGS_2.1-21     sessioninfo_1.1.1    ggridges_0.5.2       plyr_1.8.6           base64enc_0.1-3     
# [73] ps_1.5.0             prettyunits_1.1.1    rpart_4.1-15         zoo_1.8-8            cluster_2.1.0        haven_2.3.1         
# [79] fs_1.5.0             survey_4.0           magrittr_2.0.1       data.table_1.13.6    openxlsx_4.2.3       colourpicker_1.1.0  
# [85] lmtest_0.9-38        reprex_0.3.0         effects_4.2-0        tmvnsim_1.0-2        matrixStats_0.57.0   pkgload_1.1.0       
# [91] hms_0.5.3            shinyjs_2.0.0        mime_0.9             evaluate_0.14        shinystan_2.5.0      rio_0.5.16          
# [97] jpeg_0.1-8.1         readxl_1.3.1         gridExtra_2.3        rstantools_2.1.1     testthat_3.0.1       compiler_4.0.3      
# [103] bdsmatrix_1.3-4      V8_3.4.0             crayon_1.3.4         minqa_1.2.4          StanHeaders_2.21.0-7 htmltools_0.5.0     
# [109] later_1.1.0.1        RcppParallel_5.0.2   lubridate_1.7.9.2    DBI_1.1.0            ggmcmc_1.5.0         dbplyr_2.0.0        
# [115] boot_1.3-25          AICcmodavg_2.3-1     Matrix_1.2-18        car_3.0-10           cli_2.2.0            mitools_2.4         
# [121] rbibutils_2.0        parallel_4.0.3       insight_0.11.1       igraph_1.2.6         pkgconfig_2.0.3      sp_1.4-4            
# [127] R2jags_0.6-1         xml2_1.3.2           dygraphs_1.1.1.6     plm_2.2-5            rvest_0.3.6          snakecase_0.11.0    
# [133] callr_3.5.1          digest_0.6.27        janitor_2.0.1        rmarkdown_2.6        cellranger_1.1.0     htmlTable_2.1.0     
# [139] maxLik_1.4-6         curl_4.3             shiny_1.5.0          gtools_3.8.2         nloptr_1.2.2.2       lifecycle_0.2.0     
# [145] nlme_3.1-149         jsonlite_1.7.2       carData_3.0-4        desc_1.2.0           fansi_0.4.1          pillar_1.4.7        
# [151] GGally_2.0.0         loo_2.4.1            fastmap_1.0.1        httr_1.4.2           pkgbuild_1.2.0       remotes_2.2.0       
# [157] glue_1.4.2           xts_0.12.1           zip_2.1.1            png_0.1-7            shinythemes_1.1.2    pander_0.6.3        
# [163] stringi_1.5.3        memoise_1.1.0        latticeExtra_0.6-29  caTools_1.18.0 
